apcra.pro <- fread('./source/chem/apcra_pro.csv') # APCRA prospective chemicals
apcra.pro[,c(1) := NULL]
setnames(apcra.pro, c('V2','V3','V4'), c('DTXSID','CASRN','preferred_name'))
apcra.pro <- apcra.pro[-c(1),]
apcra.pro[,list := 'Pro']
# annotate chemicals that are also in APCRA retrospective case study - these will likely be data-rich
apcra.ret <- as.data.table(read.xlsx('./source/chem/Supp_File_2_pod_ratio_master_final.xlsx', sheet=1))
apcra.pro[,apcra.ret := 0]
apcra.ret.dtxsids <- apcra.ret[,DTXSID]
apcra.pro[DTXSID %in% apcra.ret.dtxsids, apcra.ret := 1]
toxval_v9_4 <- dbGetQuery(con, "SELECT * FROM res_toxval_v94.toxval INNER JOIN
res_toxval_v94.chemical ON toxval.dtxsid=chemical.dtxsid INNER JOIN
res_toxval_v94.species ON toxval.species_id=species.species_id;") %>% as.data.table()
toxval_v9_4 <- toxval_v9_4[dtxsid %in% apcra.pro[,DTXSID]]
save(toxval_v9_4, file='./source/toxval_v9_4_apcra_pro_full.RData')
load(file='./source/toxval_v9_4_apcra_pro_full.RData')
table(toxval_v9_4$qc_status)
##
## fail:human_eco not specified fail:toxval_numeric is null
## 37 2084
## fail:toxval_type not specified fail:toxval_units not specified
## 1559 143
## pass
## 89652
#toxval.apcra <- toxval_v9_1[qa_status==1]
toxval.apcra <- toxval_v9_4[qc_status=='pass']
colnames(toxval.apcra)
## [1] "toxval_id" "source_hash"
## [3] "source_table" "chemical_id"
## [5] "dtxsid" "source"
## [7] "subsource" "source_url"
## [9] "subsource_url" "details_text"
## [11] "priority_id" "qc_status"
## [13] "risk_assessment_class" "human_eco"
## [15] "toxval_type" "toxval_type_original"
## [17] "toxval_subtype" "toxval_subtype_original"
## [19] "toxval_numeric" "toxval_numeric_original"
## [21] "toxval_numeric_converted" "toxval_numeric_standard"
## [23] "toxval_numeric_human" "toxval_units"
## [25] "toxval_units_original" "toxval_units_converted"
## [27] "toxval_units_standard" "toxval_units_human"
## [29] "toxval_numeric_qualifier" "toxval_numeric_qualifier_original"
## [31] "study_type" "study_type_original"
## [33] "study_duration_class" "study_duration_class_original"
## [35] "study_duration_value" "study_duration_value_original"
## [37] "study_duration_units" "study_duration_units_original"
## [39] "species_id" "species_original"
## [41] "strain" "strain_original"
## [43] "strain_group" "habitat"
## [45] "sex" "sex_original"
## [47] "critical_effect" "critical_effect_original"
## [49] "population" "population_original"
## [51] "exposure_route" "exposure_route_original"
## [53] "exposure_method" "exposure_method_original"
## [55] "exposure_form" "exposure_form_original"
## [57] "media" "media_original"
## [59] "lifestage" "lifestage_original"
## [61] "generation" "generation_original"
## [63] "year" "year_original"
## [65] "mw" "datestamp"
## [67] "source_source_id" "toxval_uuid"
## [69] "toxval_hash" "dtxsid"
## [71] "casrn" "name"
## [73] "species_id" "common_name"
## [75] "latin_name" "kingdom"
## [77] "phylum_division" "subphylum_div"
## [79] "superclass" "class"
## [81] "tax_order" "family"
## [83] "genus" "species"
## [85] "subspecies" "variety"
## [87] "ecotox_group" "habitat"
head(toxval.apcra)
#table(toxval.apcra$species_supercategory)
table(toxval.apcra$human_eco)
##
## eco human health
## 71524 18128
toxval.apcra <- toxval.apcra[human_eco=='human health']
table(toxval.apcra$risk_assessment_class)
##
## acute air quality standard
## 2181 270
## chronic clinical
## 4595 4
## clinical study developmental
## 2 1622
## dose selection drinking water standard
## 5 49
## exposure limit genetics
## 18 8
## genotoxicity growth
## 152 17
## Hershberger human
## 35 1
## immunotoxicity in vitro
## 106 271
## morphology mortality
## 22 143
## neurotoxicity neurotoxicity short-term
## 300 2
## neurotoxicity subchronic other
## 3 95
## physchem repeat dose other
## 78 31
## reproduction reproduction developmental
## 2009 7
## short-term soil quality standard
## 1894 185
## special toxicology study subchronic
## 23 3313
## uterotrophic water quality standard
## 89 598
toxval.apcra <- toxval.apcra[risk_assessment_class %in% c('chronic',
'developmental',
'Hershberger',
'immunotoxicity',
'neurotoxicity',
'neurotoxicity short-term',
'neurotoxicity subchronic',
'repeat dose other',
'reproduction',
'reproduction developmental',
'short-term',
'subchronic',
'uterotrophic')]
table(toxval.apcra$common_name)
##
## American Mink American Short-Tailed Shrews
## 62 8
## Black Rat Cat
## 47 1
## Common Redbacked Vole Common Shrew
## 2 23
## Cow Family Coyote
## 6 1
## Deer Mouse Desert Cottontail
## 58 34
## Dog Dog, Rat
## 1342 2
## Domestic Cat Domestic Goat
## 6 3
## Domestic Sheep Domesticated Cattle
## 66 16
## Eastern Cottontail European Pine Vole
## 15 1
## European Rabbit Golden Hamster
## 117 22
## Grasshopper Mouse Gray-Tailed Vole
## 5 8
## Guinea Pig Hamster
## 14 96
## Harvest Mouse Human
## 1 5
## Human (RA) Indian Desert Jird
## 546 5
## Little Brown Bat Long-Tailed Field Mouse
## 42 4
## Mammals Meadow Vole
## 1 8
## Mink Monkey
## 9 10
## Montane Shrew Mouse
## 34 2422
## Mouse, Rat Pig
## 14 4
## Pine Vole Prairie Vole
## 6 11
## Pygmy Wood Mouse Rabbit
## 1 652
## Rabbit, Rat Rat
## 2 8178
## Red Fox River Otter
## 42 8
## Root Vole Shaw's Jird
## 1 1
## Sheep Water Buffalo
## 5 14
## Water Buffalos White-Footed Mouse
## 5 11
## White-Tailed Deer Woodchuck
## 8 1
toxval.apcra[source %in% c('DOE Wildlife Benchmarks',
'ECOTOX')]
unique(toxval.apcra$common_name)
## [1] "Human (RA)" "Human"
## [3] "Rat" "Mouse"
## [5] "Rabbit" "Dog"
## [7] "Domesticated Cattle" "Domestic Sheep"
## [9] "Rabbit, Rat" "European Rabbit"
## [11] "Mouse, Rat" "Deer Mouse"
## [13] "Black Rat" "Monkey"
## [15] "Little Brown Bat" "American Short-Tailed Shrews"
## [17] "White-Footed Mouse" "Meadow Vole"
## [19] "Mink" "Red Fox"
## [21] "River Otter" "White-Tailed Deer"
## [23] "Desert Cottontail" "Montane Shrew"
## [25] "Water Buffalo" "Golden Hamster"
## [27] "Domestic Goat" "Sheep"
## [29] "American Mink" "Hamster"
## [31] "Cow Family" "Prairie Vole"
## [33] "Gray-Tailed Vole" "Cat"
## [35] "Guinea Pig" "Indian Desert Jird"
## [37] "Water Buffalos" "Shaw's Jird"
## [39] "Common Shrew" "Mammals"
## [41] "Pig" "Woodchuck"
## [43] "Domestic Cat" "Long-Tailed Field Mouse"
## [45] "Common Redbacked Vole" "Root Vole"
## [47] "European Pine Vole" "Pygmy Wood Mouse"
## [49] "Harvest Mouse" "Eastern Cottontail"
## [51] "Coyote" "Pine Vole"
## [53] "Dog, Rat" "Grasshopper Mouse"
toxval.apcra <- toxval.apcra[common_name %in% c('Human (RA)',
'Human',
'Rat',
'Mouse',
'Rabbit',
'Dog',
'Rabbit, Rat',
'European Rabbit',
'Mouse, Rat',
'Black Rat',
'Monkey',
'Hamster',
'Guinea Pig',
'Dog, Rat')]
table(toxval.apcra$common_name)
##
## Black Rat Dog Dog, Rat European Rabbit Guinea Pig
## 47 1342 2 117 14
## Hamster Human Human (RA) Monkey Mouse
## 96 5 546 10 2422
## Mouse, Rat Rabbit Rabbit, Rat Rat
## 14 652 2 8178
toxval.apcra[common_name %in% c('Human','Human (RA)')]
table(toxval.apcra$study_type)
##
## - avoidance
## 2 1
## chronic developmental
## 4266 1611
## Hershberger immunotoxicity
## 35 101
## neurotoxicity neurotoxicity short-term
## 256 2
## neurotoxicity subchronic repeat dose other
## 3 31
## reproduction reproduction developmental
## 1968 7
## short-term subchronic
## 1839 3236
## uterotrophic
## 89
#unique(toxval.apcra$study_type)
toxval.apcra <- toxval.apcra[study_type %in% c('chronic',
'short-term',
'subchronic',
'noncancer',
'developmental',
'repeat dose other',
'reproduction',
'neurotoxicity',
'immunotoxicity',
'uterotrophic',
'neurotoxicity short-term',
'Hershberger',
'reproduction developmental',
'neurotoxicity subchronic'
)]
unique(toxval.apcra$study_type)
## [1] "chronic" "short-term"
## [3] "subchronic" "developmental"
## [5] "repeat dose other" "reproduction"
## [7] "neurotoxicity" "immunotoxicity"
## [9] "uterotrophic" "neurotoxicity short-term"
## [11] "Hershberger" "reproduction developmental"
## [13] "neurotoxicity subchronic"
table(toxval.apcra[,c('exposure_method','exposure_route')])
## exposure_route
## exposure_method - dermal environmental inhalation injection multiple
## - 378 131 4 55 468 1
## aerosol 0 0 0 38 0 0
## capsule 3 0 0 0 0 0
## culture 0 0 7 0 0 0
## diet 0 0 0 0 0 0
## driniking water 0 0 0 0 0 0
## drinking water 10 0 0 0 0 0
## dust 0 0 0 2 0 0
## feed 43 0 0 0 0 0
## gavage 241 0 0 0 0 0
## Gelatin capsules 0 0 0 0 0 0
## injection 0 0 0 0 0 0
## media mixture 0 0 4 0 0 0
## oral 0 0 0 0 0 0
## topical 0 48 0 0 0 0
## unspecified 0 0 0 0 0 0
## vapor 0 0 0 29 0 0
## exposure_route
## exposure_method Not Reported oral subcutaneous
## - 8 3005 27
## aerosol 0 0 0
## capsule 0 378 0
## culture 0 0 0
## diet 0 75 0
## driniking water 0 4 0
## drinking water 0 41 0
## dust 0 0 0
## feed 0 6558 0
## gavage 0 1869 0
## Gelatin capsules 0 4 0
## injection 0 0 10
## media mixture 0 0 0
## oral 0 1 0
## topical 0 0 0
## unspecified 0 0 2
## vapor 0 0 0
table(toxval.apcra[exposure_route=='-', c('exposure_route','exposure_method_original')])
## exposure_method_original
## exposure_route - capsule drinking water feed gavage unspecified
## - 368 3 10 43 241 10
toxval.apcra <- toxval.apcra[exposure_route %in% c('-',
'oral')]
toxval.apcra[exposure_route=='-' & exposure_method %in% c('-')]
table(toxval.apcra$toxval_units)
##
## % % diet % w/v (mg/kg-day)-1 (mg/m3)-1
## 104 61 6 42 10
## g in diet g/kg bdwt/d g/L mg mg/100 g bw
## 2 3 10 4 14
## mg/animal mg/day mg/kg mg/kg-day mg/kg diet
## 2 7 67 12021 59
## mg/kg diet/d mg/kg/org mg/L mg/org mg/org-day
## 1 2 6 2 50
## ml/kg mL/kg-day mM ng/mL ppb diet
## 2 1 8 2 29
## ppm ug/kg bdwt ug/kg bdwt/d ug/org-day uM/g
## 51 4 11 4 13
## uM/kg bw
## 12
toxval.apcra <- toxval.apcra[toxval_units %in% c('%',
'% diet',
'(mg/kg-day)-1',
'g/ kg bdwt/d',
'g/L',
'mg/kg',
'mg/kg-day',
'mg/kg diet',
'mg/kg diet/d',
'ppb diet',
'ppm',
'ug/kg bdwt',
'ug/kg bdwt/d')]
table(toxval.apcra$toxval_units)
##
## % % diet (mg/kg-day)-1 g/L mg/kg
## 104 61 42 10 67
## mg/kg-day mg/kg diet mg/kg diet/d ppb diet ppm
## 12021 59 1 29 51
## ug/kg bdwt ug/kg bdwt/d
## 4 11
toxval.apcra[,toxval.apcra.unit := toxval_units]
toxval.apcra[,toxval.apcra.mkd := toxval_numeric]
table(toxval.apcra[toxval_units=='ppm']$common_name)
##
## Black Rat Hamster Mouse Rat
## 27 2 4 18
toxval.apcra <- toxval.apcra[!(toxval_numeric==-999)] # didn't change after filtering for qa_status
range(toxval.apcra$toxval_numeric)
## [1] 1e-05 6e+05
table(toxval.apcra[,c('toxval_units')])
## toxval_units
## % % diet (mg/kg-day)-1 g/L mg/kg
## 104 61 42 10 67
## mg/kg-day mg/kg diet mg/kg diet/d ppb diet ppm
## 12021 59 1 29 51
## ug/kg bdwt ug/kg bdwt/d
## 4 11
# conversion from ug or g/kg-day to mg/kg-day (mkd)
toxval.apcra[toxval_units=='ug/kg bdwt', toxval.apcra.mkd := toxval_numeric/1000]
toxval.apcra[toxval_units=='ug/kg bdwt/d', toxval.apcra.mkd := toxval_numeric/1000]
toxval.apcra[toxval_units %in% c('ug/kg bdwt','ug/kg bdwt/d'), toxval.apcra.unit := 'mg/kg-day']
table(toxval.apcra[toxval_units %in% c('% diet','%'), c('exposure_method','common_name')])
## common_name
## exposure_method Hamster Mouse Rat
## - 0 54 94
## feed 1 3 13
# will assume that % means in feed
# conversions for % feed/diet into mkd, where 1% in diet = 10,000 ppm
# get from % in diet to ppm multiplying by 10,000
# get to mg/kg/d by muliplying by the 1ppm to mkd conversion by species
toxval.apcra[toxval_units %in% c('%', '% diet') & common_name %in% c('dog','Dog'), toxval.apcra.mkd := (toxval_numeric*10000)*0.025]
toxval.apcra[toxval_units %in% c('%', '% diet') & common_name %in% c('rat', 'Rat'), toxval.apcra.mkd := (toxval_numeric*10000)*0.05]
toxval.apcra[toxval_units %in% c('%', '% diet') & common_name %in% c('mouse', 'Mouse'), toxval.apcra.mkd := (toxval_numeric*10000)*0.15]
toxval.apcra[toxval_units %in% c('%', '% diet') & common_name %in% c('rabbit','Rabbit'), toxval.apcra.mkd := (toxval_numeric*10000)*0.03]
toxval.apcra <- toxval.apcra[!(toxval_units %in% c('%', '% diet') & common_name %in% c('Hamster'))]
toxval.apcra[toxval_units %in% c('%', '% diet'), toxval.apcra.unit := 'mg/kg-day']
table(toxval.apcra[toxval_units %in% c('ppm','ppb'),c('toxval_units','common_name')])
## common_name
## toxval_units Black Rat Hamster Mouse Rat
## ppm 27 2 4 18
# conversions to mkd from ppm
#toxval.apcra[toxval_units=='ppm' & species_common=='dog', toxval.apcra.mkd := toxval_numeric*0.025]
toxval.apcra[toxval_units=='ppm' & common_name=='Rat', toxval.apcra.mkd := toxval_numeric*0.05]
toxval.apcra[toxval_units=='ppm' & common_name=='Black Rat', toxval.apcra.mkd := toxval_numeric*0.05]
toxval.apcra[toxval_units=='ppm' & common_name=='Mouse', toxval.apcra.mkd := toxval_numeric*0.15]
#toxval.apcra[toxval_units=='ppm' & species_common=='rabbit', toxval.apcra.mkd := toxval_numeric*0.03]
#toxval.apcra[toxval_units=='ppm' & species_common=='guinea pig', toxval.apcra.mkd := toxval_numeric*0.040]
toxval.apcra[toxval_units=='ppm' & common_name=='Hamster', toxval.apcra.mkd := toxval_numeric*0.094]
toxval.apcra[toxval_units=='ppm', toxval.apcra.unit := 'mg/kg-day']
# conversions from ppb diet
table(toxval.apcra[toxval_units=='ppb diet', c('common_name', 'toxval_units')])
## toxval_units
## common_name ppb diet
## Mouse 20
## Rat 9
toxval.apcra[toxval_units=='ppb diet' & common_name=='Mouse',toxval.apcra.mkd := toxval_numeric*0.001*0.015 ]
toxval.apcra[toxval_units=='ppb diet' & common_name=='Rat',toxval.apcra.mkd := toxval_numeric*0.001*0.05 ]
toxval.apcra[toxval_units=='ppb diet', toxval.apcra.unit := 'mg/kg-day']
table(toxval.apcra[toxval_units %in% c('mg/kg diet','mg/kg diet/d'),c('toxval_units','common_name')])
## common_name
## toxval_units Dog Hamster Mouse Rat
## mg/kg diet 1 13 24 21
## mg/kg diet/d 0 0 0 1
# toxval.apcra[toxval_units=='mg/kg'] # somewhat impossible to know if this is in diet or kg-bw; on inspection it mostly seems like mg/kg-bw
# conversions for mg/kg diet to mkd, where 1 mg/kg diet ~ 1 ppm in diet
toxval.apcra[toxval_units=='mg/kg diet' & common_name=='Dog', toxval.apcra.mkd := toxval_numeric*0.025]
toxval.apcra[toxval_units=='mg/kg diet' & common_name=='Rat', toxval.apcra.mkd := toxval_numeric*0.05]
toxval.apcra[toxval_units=='mg/kg diet/d' & common_name=='Rat', toxval.apcra.mkd := toxval_numeric*0.05]
toxval.apcra[toxval_units=='mg/kg diet' & common_name=='Mouse', toxval.apcra.mkd := toxval_numeric*0.15]
toxval.apcra[toxval_units=='mg/kg diet' & common_name=='Hamster', toxval.apcra.mkd := toxval_numeric*0.094]
toxval.apcra[toxval_units %in% c('mg/kg diet','mg/kg diet/d'), toxval.apcra.unit := 'mg/kg-day']
toxval.apcra[toxval_units=='g/L' & common_name=='Rat', toxval.apcra.mkd := toxval_numeric*1000*0.05]
toxval.apcra[toxval_units %in% c('g/L'), toxval.apcra.unit := 'mg/kg-day']
table(toxval.apcra$toxval.apcra.unit)
##
## (mg/kg-day)-1 mg/kg mg/kg-day
## 42 67 12350
table(toxval.apcra$toxval_type)
##
## BMD BMDL
## 38 41
## BMDL (0.5 SD) BMDL (05)
## 2 2
## BMDL (10) cancer slope factor
## 3 43
## ED3 ED30
## 1 2
## ED50 HNEL
## 14 88
## IC50 LC50
## 1 18
## LD0 LD100
## 3 3
## LD16 LD50
## 3 17
## LD84 LEL
## 3 2277
## LOAEC LOAEL
## 1 2273
## LOAEL (HED) LOEC
## 3 14
## LOEL MRL
## 982 45
## NEL NOAEC
## 2003 2
## NOAEL NOEC
## 2914 10
## NOEL NTD
## 1342 1
## RfD RfD (screening chronic)
## 279 1
## RfD (screening subchronic) SRfDo
## 1 24
## T25 TDLo
## 2 3
#unique(toxval.apcra$toxval_type)
toxval.apcra <- toxval.apcra[toxval_type %in% c('BMD',
'BMDL',
'BMDL10',
'HNEL',
'LEL',
'LOAEL',
'NEL',
'NOAEL',
'NOAEL ',
'NOEL',
'NEL',
# remove these later but want to see them first
'RfD',
'cancer slope factor'
)]
table(toxval.apcra$toxval_type)
##
## BMD BMDL cancer slope factor HNEL
## 38 41 43 88
## LEL LOAEL NEL NOAEL
## 2277 2273 2003 2914
## NOEL RfD
## 1342 279
ggplot(data=toxval.apcra[,c('toxval.apcra.mkd')], aes(x=log10(toxval.apcra.mkd)))+
geom_histogram(bins=50)+
theme_bw()+
xlab("Toxval log10-mg/kg/day values")
toxval.apcra.norfds <- toxval.apcra[!(toxval_type %in% c('RfD','cancer slope factor'))]
ggplot(data=toxval.apcra.norfds[,c('toxval.apcra.mkd')], aes(x=log10(toxval.apcra.mkd)))+
geom_histogram(bins=50)+
theme_bw()
lo.values <- toxval.apcra.norfds[toxval.apcra.mkd < 0.01]
length(unique(toxval.apcra.norfds$dtxsid)) #165 dtxsids
## [1] 165
lo.values[dtxsid=='DTXSID6020062']
ggplot(data=toxval.apcra.norfds[dtxsid=='DTXSID6020062',c('toxval.apcra.mkd')], aes(x=log10(toxval.apcra.mkd)))+
geom_histogram(bins=50)+
theme_bw()
toxval.apcra.summary <- unique(toxval.apcra.norfds[, list(
min.toxval.numeric = min(toxval.apcra.mkd),
p5.toxval.numeric = quantile(toxval.apcra.mkd, probs=c(0.05)),
p10.toxval.numeric = quantile(toxval.apcra.mkd, probs=c(0.10)),
p15.toxval.numeric = quantile(toxval.apcra.mkd, probs=c(0.15)),
p20.toxval.numeric = quantile(toxval.apcra.mkd, probs=c(0.20)),
p25.toxval.numeric = quantile(toxval.apcra.mkd, probs=c(0.25)),
p30.toxval.numeric = quantile(toxval.apcra.mkd, probs=c(0.30)),
max.toxval.numeric = max(toxval.apcra.mkd),
median.toxval.numeric = median(toxval.apcra.mkd),
mean.toxval.numeric = mean(toxval.apcra.mkd),
stdev.toxval.numeric = sd(toxval.apcra.mkd),
number.toxval.numeric = .N
), by=list(dtxsid, casrn, name)])
table(toxval.apcra.norfds$risk_assessment_class)
##
## chronic developmental
## 3687 1529
## Hershberger immunotoxicity
## 7 38
## neurotoxicity neurotoxicity short-term
## 98 2
## neurotoxicity subchronic repeat dose other
## 3 28
## reproduction reproduction developmental
## 1862 7
## short-term subchronic
## 877 2801
## uterotrophic
## 37
# updating from study_duration_class to risk_assessment_class increases the number of chemicals with subchronic PODs from 9 to 160
toxval.apcra.summary.subchronic <- unique(toxval.apcra.norfds[risk_assessment_class %in% c('subchronic','repeat dose other','short-term'), list(
min.toxval.numeric.sub = min(toxval.apcra.mkd),
p5.toxval.numeric.sub = quantile(toxval.apcra.mkd, probs=c(0.05)),
p10.toxval.numeric.sub = quantile(toxval.apcra.mkd, probs=c(0.10)),
p15.toxval.numeric.sub = quantile(toxval.apcra.mkd, probs=c(0.15)),
p20.toxval.numeric.sub = quantile(toxval.apcra.mkd, probs=c(0.20)),
p25.toxval.numeric.sub = quantile(toxval.apcra.mkd, probs=c(0.25)),
p30.toxval.numeric.sub = quantile(toxval.apcra.mkd, probs=c(0.30)),
max.toxval.numeric.sub = max(toxval.apcra.mkd),
median.toxval.numeric.sub = median(toxval.apcra.mkd),
mean.toxval.numeric.sub = mean(toxval.apcra.mkd),
stdev.toxval.numeric.sub = sd(toxval.apcra.mkd),
number.toxval.numeric.sub = .N
), by=list(dtxsid, casrn, name)])
toxval.apcra.summary$apcra.ret.5p.POD <- apcra.ret$p5.POD[match(toxval.apcra.summary$dtxsid,
apcra.ret$DTXSID)]
toxval.apcra.summary[, diff := as.numeric(log10(p5.toxval.numeric) - log10(apcra.ret.5p.POD)) ]
toxval.apcra.summary[!is.na(diff),label := name]
toxval.apcra.summary[diff > -0.5, label := ""]
toxval.apcra.summary[is.na(diff), label := ""]
big.diffs <- toxval.apcra.summary[diff< -0.5, dtxsid]
length(unique(toxval.apcra.summary[!is.na(diff)]$dtxsid))
## [1] 96
#library(ggrepel)
ggplot(data=toxval.apcra.summary, aes(x=apcra.ret.5p.POD, y=p5.toxval.numeric)) +
geom_point(size=1)+
geom_text_repel(data=toxval.apcra.summary, aes(x=apcra.ret.5p.POD, y=p5.toxval.numeric, label=label))+
scale_y_log10(limits=c(10^-3,10^5),
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x)))+
scale_x_log10(limits=c(10^-3,10^5),
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x)))+
geom_abline(slope=1, intercept=0) +
geom_abline(slope=1, intercept=0.5, linetype="dashed") +
geom_abline(slope=1, intercept=-0.5, linetype="dashed")+
theme_bw()+
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12))+
xlab('5th percentile APCRA retrospective POD')+
ylab('5th percentile APCRA prospective POD')
* For these substances, the new ToxVal 5th percentile POD was lower by
at least 0.5 log10-mg/kg/day * None of them seem quite as egregious as
the one above - no separations of 3-4 orders of magnitude between lowest
values and next lowest.
diffs <- toxval.apcra.norfds[dtxsid %in% big.diffs, c('dtxsid','name','toxval.apcra.mkd')]
ggplot(data=diffs, aes(x=log10(toxval.apcra.mkd)))+
geom_histogram(bins=50)+
theme_bw()+
scale_y_log10()+
facet_wrap(~ name)
toxval.summary.long <- melt.data.table(toxval.apcra.summary,
id.vars = c('dtxsid','casrn','name', 'apcra.ret.5p.POD'),
measure.vars = c('p5.toxval.numeric',
'p10.toxval.numeric',
'p15.toxval.numeric',
'p20.toxval.numeric',
'p25.toxval.numeric',
'p30.toxval.numeric'),
variable.name = c('toxval.p'))
col.num <- c("value", "apcra.ret.5p.POD")
toxval.summary.long <- toxval.summary.long[, (col.num) := lapply(.SD, function(x) log10(x)), .SDcols = col.num ]
head(toxval.summary.long)
fig.toxval.ecdf <- ggplot(toxval.summary.long[toxval.p %in% c('p5.toxval.numeric',
'p10.toxval.numeric',
'p15.toxval.numeric',
'p20.toxval.numeric',
'p25.toxval.numeric',
'p30.toxval.numeric')], aes(value, color=toxval.p))+
stat_ecdf(geom='step', size=1.5)+
scale_y_continuous(trans = 'log10',
breaks= c(0.01, 0.1,0.2,0.3,0.4,0.5,0.75, 1))+
ylab("Cumulative Frequency") +
xlab('log10 POD quantile')+
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
axis.text = element_text(size=10),
axis.title = element_text(size=12, face='bold'))+
theme(axis.text.y = element_text(family = "sans", face = "bold", size=12))+
theme(legend.position="right", legend.title=element_blank())+
scale_x_continuous(breaks=seq(-5,10,1)) +
coord_cartesian(xlim = c(-5, 10)) +
#scale_color_viridis(discrete=TRUE, name='Ratio Type')+
scale_colour_manual(breaks=c('p5.toxval.numeric',
'p10.toxval.numeric',
'p15.toxval.numeric',
'p20.toxval.numeric',
'p25.toxval.numeric',
'p30.toxval.numeric'),
values=c("#440154FF", "#3B528BFF", "#21908CFF", "#5DC863FF", "#FDE725FF", 'darkmagenta'),
labels('5th %-ile','10th %-ile','15th %-ile', '20th %-ile','25th %ile', '30th %ile'))+
geom_vline(xintercept=-2, lty='dashed', color='red')+
geom_vline(xintercept=2, lty='dashed', color='red')+
geom_vline(xintercept=0, color='red')+
geom_hline(yintercept=0.90, lty='dashed', color='red')
fig.toxval.ecdf
fig.toxval.violin <- ggplot(toxval.summary.long[toxval.p %in% c('p5.toxval.numeric',
'p10.toxval.numeric',
'p15.toxval.numeric',
'p20.toxval.numeric',
'p25.toxval.numeric',
'p30.toxval.numeric')], aes(x=toxval.p, y=value, fill=toxval.p))+
geom_violin(draw_quantiles = c(0.25,0.5,0.75), color='white', trim=FALSE)+
#geom_boxplot(width=0.1, color='white')+
scale_y_continuous(breaks=seq(-3,2,0.5))+
ylab("ToxVal POD Value, log10-mg/kg/day") +
xlab('Percentile')+
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
axis.text = element_text(size=10),
axis.title = element_text(size=12, face='bold'))+
theme(
axis.text.x = element_text(angle=45, hjust=1))+
theme(legend.position="right", legend.title=element_blank())+
scale_fill_manual(values=viridis(6),
breaks=c('p5.toxval.numeric',
'p10.toxval.numeric',
'p15.toxval.numeric',
'p20.toxval.numeric',
'p25.toxval.numeric',
'p30.toxval.numeric'),
labels=c('5th','10th','15th','20th','25th','30th'))+
scale_x_discrete(labels=c('p5.toxval.numeric' ='5th',
'p10.toxval.numeric' = '10th',
'p15.toxval.numeric' = '15th',
'p20.toxval.numeric' = '20th',
'p25.toxval.numeric' = '25th',
'p30.toxval.numeric' = '30th'))
fig.toxval.violin
wilcox <- group_by(toxval.summary.long, toxval.p) %>%
summarise(
count = n(),
median = median(value, na.rm = TRUE),
IQR = IQR(value, na.rm = TRUE)
)
wilcox
wilcox.test(data=toxval.apcra.summary,
toxval.apcra.summary$p5.toxval.numeric,
toxval.apcra.summary$p10.toxval.numeric, mu=0, alt='two.sided',paired=TRUE, conf.int=TRUE, conf.level=0.95)
##
## Wilcoxon signed rank test with continuity correction
##
## data: toxval.apcra.summary$p5.toxval.numeric and toxval.apcra.summary$p10.toxval.numeric
## V = 0, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -10.400507 -3.360246
## sample estimates:
## (pseudo)median
## -5.33543
fig.toxval.distrib <- ggplot()+
geom_histogram(data=toxval.apcra.norfds[,c('dtxsid','toxval.apcra.mkd')],
aes(x=toxval.apcra.mkd))+
theme_bw()+
ylab('Frequency')+
xlab('ToxVal Numeric POD (log10-mg/kg/day)')+
theme(axis.text.x = element_text(size=12, angle=45, hjust=1),
axis.text.y = element_text(size=12),
axis.title = element_text(size=14),
plot.title = element_text(hjust=0.5))+
scale_x_log10(limits=c(10^-4,10^4),
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x)))
fig.toxval.distrib
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
file.dir <- paste('output/', sep='')
file.name <- paste('/SuppFig_ToxVal_percentiles_', Sys.Date(), '.tiff', sep='')
file.path <- paste(file.dir, file.name, sep='')
dir.create(path=file.dir, showWarnings = FALSE, recursive = TRUE)
tiff(file.path, width=8, height=5, units='in', res=450)
plot_grid(fig.toxval.distrib, fig.toxval.violin, ncol=2, labels=c("A", "B"), label_size = 14)
dev.off()
list_data <- list("toxval.apcra.pro.summary" = as.data.frame(toxval.apcra.summary),
"toxval.apcra.pro.summ.SUBonly" = as.data.frame(toxval.apcra.summary.subchronic),
"toxval.apcra.pro.all" = as.data.frame(toxval.apcra.norfds))
write.xlsx(list_data, './source/apcra_pro_toxval_v9_4_PODs.xlsx')
save(toxval.apcra.norfds,
toxval.apcra.summary,
toxval.apcra.summary.subchronic,
file='./source/apcra_pro_toxval_v9_4_PODs.RData')
print(sessionInfo())
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] viridis_0.6.2 viridisLite_0.4.1 tcpl_3.1.0
## [4] tidyr_1.3.0 RMySQL_0.10.25 DBI_1.2.2
## [7] randomForest_4.7-1.1 plotly_4.10.1 openxlsx_4.2.5.2
## [10] jtools_2.2.1 kableExtra_1.3.4 httk_2.3.0
## [13] gplots_3.1.3 ggstance_0.3.6 ggrepel_0.9.3
## [16] DT_0.28 dplyr_1.1.1 DescTools_0.99.48
## [19] data.table_1.14.8 cowplot_1.1.1 caret_6.0-94
## [22] lattice_0.21-8 ggplot2_3.4.2
##
## loaded via a namespace (and not attached):
## [1] readxl_1.4.2 systemfonts_1.0.4 plyr_1.8.8
## [4] lazyeval_0.2.2 splines_4.2.2 listenv_0.9.0
## [7] digest_0.6.31 foreach_1.5.2 htmltools_0.5.8.1
## [10] fansi_1.0.4 magrittr_2.0.3 memoise_2.0.1
## [13] recipes_1.0.5 globals_0.16.2 gower_1.0.1
## [16] svglite_2.1.1 hardhat_1.3.0 timechange_0.2.0
## [19] colorspace_2.1-0 blob_1.2.4 rvest_1.0.3
## [22] mitools_2.4 rbibutils_2.2.13 xfun_0.43
## [25] crayon_1.5.2 jsonlite_1.8.4 Exact_3.2
## [28] survival_3.5-5 iterators_1.0.14 glue_1.6.2
## [31] gtable_0.3.4 ipred_0.9-14 webshot_0.5.4
## [34] future.apply_1.10.0 scales_1.3.0 mvtnorm_1.1-3
## [37] Rcpp_1.0.10 tcplfit2_0.1.6 bit_4.0.5
## [40] proxy_0.4-27 deSolve_1.35 sqldf_0.4-11
## [43] stats4_4.2.2 lava_1.7.2.1 survey_4.1-1
## [46] prodlim_2023.03.31 htmlwidgets_1.6.4 httr_1.4.7
## [49] RColorBrewer_1.1-3 farver_2.1.1 pkgconfig_2.0.3
## [52] nnet_7.3-18 sass_0.4.9 utf8_1.2.3
## [55] RMariaDB_1.2.2 labeling_0.4.3 tidyselect_1.2.1
## [58] rlang_1.1.0 reshape2_1.4.4 munsell_0.5.1
## [61] cellranger_1.1.0 tools_4.2.2 cachem_1.0.7
## [64] cli_3.6.1 gsubfn_0.7 generics_0.1.3
## [67] RSQLite_2.3.1 evaluate_0.23 stringr_1.5.1
## [70] fastmap_1.1.1 yaml_2.3.7 ModelMetrics_1.2.2.2
## [73] knitr_1.46 bit64_4.0.5 zip_2.2.2
## [76] pander_0.6.5 caTools_1.18.2 purrr_1.0.1
## [79] rootSolve_1.8.2.3 future_1.32.0 nlme_3.1-162
## [82] xml2_1.3.3 compiler_4.2.2 rstudioapi_0.14
## [85] e1071_1.7-13 tibble_3.2.1 bslib_0.7.0
## [88] stringi_1.7.12 highr_0.10 Matrix_1.5-4
## [91] vctrs_0.6.1 msm_1.7 pillar_1.9.0
## [94] lifecycle_1.0.4 Rdpack_2.4 jquerylib_0.1.4
## [97] bitops_1.0-7 lmom_2.9 R6_2.5.1
## [100] KernSmooth_2.23-20 gridExtra_2.3 parallelly_1.35.0
## [103] gld_2.6.6 codetools_0.2-19 boot_1.3-28.1
## [106] MASS_7.3-58.3 gtools_3.9.4 chron_2.3-60
## [109] proto_1.0.0 withr_3.0.0 expm_0.999-7
## [112] parallel_4.2.2 hms_1.1.3 grid_4.2.2
## [115] rpart_4.1.19 timeDate_4022.108 class_7.3-21
## [118] rmarkdown_2.26 pROC_1.18.0 numDeriv_2016.8-1.1
## [121] lubridate_1.9.2